In [1]:
!pip install pandas==2.3.0
Requirement already satisfied: pandas==2.3.0 in c:\users\aditya singh\anaconda3\lib\site-packages (2.3.0)
Requirement already satisfied: numpy>=1.26.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas==2.3.0) (1.26.4)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas==2.3.0) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas==2.3.0) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas==2.3.0) (2023.3)
Requirement already satisfied: six>=1.5 in c:\users\aditya singh\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas==2.3.0) (1.16.0)
In [2]:
pip install numpy==1.26.4
Requirement already satisfied: numpy==1.26.4 in c:\users\aditya singh\anaconda3\lib\site-packages (1.26.4)
Note: you may need to restart the kernel to use updated packages.
In [3]:
!pip install numpy==1.26.4 --upgrade --force-reinstall
Collecting numpy==1.26.4
  Using cached numpy-1.26.4-cp312-cp312-win_amd64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp312-cp312-win_amd64.whl (15.5 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.26.4
    Uninstalling numpy-1.26.4:
      Successfully uninstalled numpy-1.26.4
Successfully installed numpy-1.26.4
In [4]:
!pip install matplotlib==3.10.3
!pip install seaborn==0.13.2
!pip install plotly==6.1.2
Requirement already satisfied: matplotlib==3.10.3 in c:\users\aditya singh\anaconda3\lib\site-packages (3.10.3)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (1.4.4)
Requirement already satisfied: numpy>=1.23 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (1.26.4)
Requirement already satisfied: packaging>=20.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (24.1)
Requirement already satisfied: pillow>=8 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (10.4.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (3.1.2)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (2.9.0.post0)
Requirement already satisfied: six>=1.5 in c:\users\aditya singh\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib==3.10.3) (1.16.0)
Requirement already satisfied: seaborn==0.13.2 in c:\users\aditya singh\anaconda3\lib\site-packages (0.13.2)
Requirement already satisfied: numpy!=1.24.0,>=1.20 in c:\users\aditya singh\anaconda3\lib\site-packages (from seaborn==0.13.2) (1.26.4)
Requirement already satisfied: pandas>=1.2 in c:\users\aditya singh\anaconda3\lib\site-packages (from seaborn==0.13.2) (2.3.0)
Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in c:\users\aditya singh\anaconda3\lib\site-packages (from seaborn==0.13.2) (3.10.3)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (1.4.4)
Requirement already satisfied: packaging>=20.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (24.1)
Requirement already satisfied: pillow>=8 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (10.4.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (3.1.2)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas>=1.2->seaborn==0.13.2) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas>=1.2->seaborn==0.13.2) (2023.3)
Requirement already satisfied: six>=1.5 in c:\users\aditya singh\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (1.16.0)
Collecting plotly==6.1.2
  Using cached plotly-6.1.2-py3-none-any.whl.metadata (6.9 kB)
Requirement already satisfied: narwhals>=1.15.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from plotly==6.1.2) (1.42.0)
Requirement already satisfied: packaging in c:\users\aditya singh\anaconda3\lib\site-packages (from plotly==6.1.2) (24.1)
Using cached plotly-6.1.2-py3-none-any.whl (16.3 MB)
Installing collected packages: plotly
  Attempting uninstall: plotly
    Found existing installation: plotly 6.2.0
    Uninstalling plotly-6.2.0:
      Successfully uninstalled plotly-6.2.0
Successfully installed plotly-6.1.2
In [5]:
!pip install pandas==2.3.0
!pip install numpy==2.3.0
!pip install matplotlib==3.10.3
!pip install seaborn==0.13.2
!pip install plotly==6.1.2
Requirement already satisfied: pandas==2.3.0 in c:\users\aditya singh\anaconda3\lib\site-packages (2.3.0)
Requirement already satisfied: numpy>=1.26.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas==2.3.0) (1.26.4)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas==2.3.0) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas==2.3.0) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas==2.3.0) (2023.3)
Requirement already satisfied: six>=1.5 in c:\users\aditya singh\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas==2.3.0) (1.16.0)
Collecting numpy==2.3.0
  Using cached numpy-2.3.0-cp312-cp312-win_amd64.whl.metadata (60 kB)
Using cached numpy-2.3.0-cp312-cp312-win_amd64.whl (12.7 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.26.4
    Uninstalling numpy-1.26.4:
      Successfully uninstalled numpy-1.26.4
Successfully installed numpy-2.3.0
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
contourpy 1.2.0 requires numpy<2.0,>=1.20, but you have numpy 2.3.0 which is incompatible.
gensim 4.3.3 requires numpy<2.0,>=1.18.5, but you have numpy 2.3.0 which is incompatible.
numba 0.60.0 requires numpy<2.1,>=1.22, but you have numpy 2.3.0 which is incompatible.
scipy 1.13.1 requires numpy<2.3,>=1.22.4, but you have numpy 2.3.0 which is incompatible.
Requirement already satisfied: matplotlib==3.10.3 in c:\users\aditya singh\anaconda3\lib\site-packages (3.10.3)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (1.4.4)
Requirement already satisfied: numpy>=1.23 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (2.3.0)
Requirement already satisfied: packaging>=20.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (24.1)
Requirement already satisfied: pillow>=8 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (10.4.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (3.1.2)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (2.9.0.post0)
Collecting numpy>=1.23 (from matplotlib==3.10.3)
  Using cached numpy-1.26.4-cp312-cp312-win_amd64.whl.metadata (61 kB)
Requirement already satisfied: six>=1.5 in c:\users\aditya singh\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib==3.10.3) (1.16.0)
Using cached numpy-1.26.4-cp312-cp312-win_amd64.whl (15.5 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.3.0
    Uninstalling numpy-2.3.0:
      Successfully uninstalled numpy-2.3.0
Successfully installed numpy-1.26.4
Requirement already satisfied: seaborn==0.13.2 in c:\users\aditya singh\anaconda3\lib\site-packages (0.13.2)
Requirement already satisfied: numpy!=1.24.0,>=1.20 in c:\users\aditya singh\anaconda3\lib\site-packages (from seaborn==0.13.2) (1.26.4)
Requirement already satisfied: pandas>=1.2 in c:\users\aditya singh\anaconda3\lib\site-packages (from seaborn==0.13.2) (2.3.0)
Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in c:\users\aditya singh\anaconda3\lib\site-packages (from seaborn==0.13.2) (3.10.3)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (1.4.4)
Requirement already satisfied: packaging>=20.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (24.1)
Requirement already satisfied: pillow>=8 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (10.4.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (3.1.2)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas>=1.2->seaborn==0.13.2) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas>=1.2->seaborn==0.13.2) (2023.3)
Requirement already satisfied: six>=1.5 in c:\users\aditya singh\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (1.16.0)
Requirement already satisfied: plotly==6.1.2 in c:\users\aditya singh\anaconda3\lib\site-packages (6.1.2)
Requirement already satisfied: narwhals>=1.15.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from plotly==6.1.2) (1.42.0)
Requirement already satisfied: packaging in c:\users\aditya singh\anaconda3\lib\site-packages (from plotly==6.1.2) (24.1)
In [6]:
!pip check
No broken requirements found.
In [7]:
pip install --upgrade pandas matplotlib seaborn plotly
Requirement already satisfied: pandas in c:\users\aditya singh\anaconda3\lib\site-packages (2.3.0)
Requirement already satisfied: matplotlib in c:\users\aditya singh\anaconda3\lib\site-packages (3.10.3)
Requirement already satisfied: seaborn in c:\users\aditya singh\anaconda3\lib\site-packages (0.13.2)
Requirement already satisfied: plotly in c:\users\aditya singh\anaconda3\lib\site-packages (6.1.2)
Collecting plotly
  Using cached plotly-6.2.0-py3-none-any.whl.metadata (8.5 kB)
Requirement already satisfied: numpy>=1.26.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas) (1.26.4)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas) (2023.3)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib) (1.4.4)
Requirement already satisfied: packaging>=20.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib) (24.1)
Requirement already satisfied: pillow>=8 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib) (10.4.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib) (3.1.2)
Requirement already satisfied: narwhals>=1.15.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from plotly) (1.42.0)
Requirement already satisfied: six>=1.5 in c:\users\aditya singh\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)
Using cached plotly-6.2.0-py3-none-any.whl (9.6 MB)
Installing collected packages: plotly
  Attempting uninstall: plotly
    Found existing installation: plotly 6.1.2
    Uninstalling plotly-6.1.2:
      Successfully uninstalled plotly-6.1.2
Successfully installed plotly-6.2.0
Note: you may need to restart the kernel to use updated packages.
In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from warnings import filterwarnings
filterwarnings('ignore')
In [9]:
print("\033[1mFile Uploading\033[0m")
File Uploading
In [10]:
ipl_data = pd.read_csv("IPL.csv")
In [11]:
ipl_data.head()
Out[11]:
Unnamed: 0 match_id date match_type event_name innings batting_team bowling_team over ball ... team_runs team_balls team_wicket new_batter batter_runs batter_balls bowler_wicket batting_partners next_batter striker_out
0 131970 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 0 1 ... 1 1 0 NaN 0 1 0 ('BB McCullum', 'SC Ganguly') NaN False
1 131971 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 0 2 ... 1 2 0 NaN 0 1 0 ('BB McCullum', 'SC Ganguly') NaN False
2 131972 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 0 3 ... 2 2 0 NaN 0 1 0 ('BB McCullum', 'SC Ganguly') NaN False
3 131973 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 0 3 ... 2 3 0 NaN 0 2 0 ('BB McCullum', 'SC Ganguly') NaN False
4 131974 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 0 4 ... 2 4 0 NaN 0 3 0 ('BB McCullum', 'SC Ganguly') NaN False

5 rows × 64 columns

In [12]:
ipl_data.shape
Out[12]:
(278205, 64)
In [13]:
ipl_data.columns
Out[13]:
Index(['Unnamed: 0', 'match_id', 'date', 'match_type', 'event_name', 'innings',
       'batting_team', 'bowling_team', 'over', 'ball', 'ball_no', 'batter',
       'bat_pos', 'runs_batter', 'balls_faced', 'bowler', 'valid_ball',
       'runs_extras', 'runs_total', 'runs_bowler', 'runs_not_boundary',
       'extra_type', 'non_striker', 'non_striker_pos', 'wicket_kind',
       'player_out', 'fielders', 'runs_target', 'review_batter',
       'team_reviewed', 'review_decision', 'umpire', 'umpires_call',
       'player_of_match', 'match_won_by', 'win_outcome', 'toss_winner',
       'toss_decision', 'venue', 'city', 'day', 'month', 'year', 'season',
       'gender', 'team_type', 'superover_winner', 'result_type', 'method',
       'balls_per_over', 'overs', 'event_match_no', 'stage', 'match_number',
       'team_runs', 'team_balls', 'team_wicket', 'new_batter', 'batter_runs',
       'batter_balls', 'bowler_wicket', 'batting_partners', 'next_batter',
       'striker_out'],
      dtype='object')
In [14]:
print("\033[1mData Cleaning\033[0m")
Data Cleaning
In [15]:
ipl_data.head()
ipl_data.info()
ipl_data.describe()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 278205 entries, 0 to 278204
Data columns (total 64 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   Unnamed: 0         278205 non-null  int64  
 1   match_id           278205 non-null  int64  
 2   date               278205 non-null  object 
 3   match_type         278205 non-null  object 
 4   event_name         278205 non-null  object 
 5   innings            278205 non-null  int64  
 6   batting_team       278205 non-null  object 
 7   bowling_team       278205 non-null  object 
 8   over               278205 non-null  int64  
 9   ball               278205 non-null  int64  
 10  ball_no            278205 non-null  float64
 11  batter             278205 non-null  object 
 12  bat_pos            278205 non-null  int64  
 13  runs_batter        278205 non-null  int64  
 14  balls_faced        278205 non-null  int64  
 15  bowler             278205 non-null  object 
 16  valid_ball         278205 non-null  int64  
 17  runs_extras        278205 non-null  int64  
 18  runs_total         278205 non-null  int64  
 19  runs_bowler        278205 non-null  int64  
 20  runs_not_boundary  278205 non-null  bool   
 21  extra_type         15133 non-null   object 
 22  non_striker        278205 non-null  object 
 23  non_striker_pos    278205 non-null  int64  
 24  wicket_kind        13823 non-null   object 
 25  player_out         13823 non-null   object 
 26  fielders           10013 non-null   object 
 27  runs_target        133903 non-null  float64
 28  review_batter      872 non-null     object 
 29  team_reviewed      872 non-null     object 
 30  review_decision    872 non-null     object 
 31  umpire             872 non-null     object 
 32  umpires_call       278205 non-null  bool   
 33  player_of_match    278205 non-null  object 
 34  match_won_by       278205 non-null  object 
 35  win_outcome        273503 non-null  object 
 36  toss_winner        278205 non-null  object 
 37  toss_decision      278205 non-null  object 
 38  venue              278205 non-null  object 
 39  city               278205 non-null  object 
 40  day                278205 non-null  int64  
 41  month              278205 non-null  int64  
 42  year               278205 non-null  int64  
 43  season             278205 non-null  object 
 44  gender             278205 non-null  object 
 45  team_type          278205 non-null  object 
 46  superover_winner   3896 non-null    object 
 47  result_type        4702 non-null    object 
 48  method             3890 non-null    object 
 49  balls_per_over     278205 non-null  int64  
 50  overs              278205 non-null  int64  
 51  event_match_no     278205 non-null  object 
 52  stage              278205 non-null  object 
 53  match_number       278205 non-null  object 
 54  team_runs          278205 non-null  int64  
 55  team_balls         278205 non-null  int64  
 56  team_wicket        278205 non-null  int64  
 57  new_batter         13321 non-null   object 
 58  batter_runs        278205 non-null  int64  
 59  batter_balls       278205 non-null  int64  
 60  bowler_wicket      278205 non-null  int64  
 61  batting_partners   278205 non-null  object 
 62  next_batter        13321 non-null   object 
 63  striker_out        278205 non-null  bool   
dtypes: bool(3), float64(2), int64(24), object(35)
memory usage: 130.3+ MB
Out[15]:
Unnamed: 0 match_id innings over ball ball_no bat_pos runs_batter balls_faced valid_ball ... month year balls_per_over overs team_runs team_balls team_wicket batter_runs batter_balls bowler_wicket
count 278205.000000 2.782050e+05 278205.000000 278205.000000 278205.000000 278205.000000 278205.000000 278205.000000 278205.000000 278205.000000 ... 278205.000000 278205.000000 278205.0 278205.0 278205.000000 278205.000000 278205.000000 278205.000000 278205.000000 278205.000000
mean 139102.000000 9.422687e+05 1.482914 9.193839 3.488855 9.542725 3.612555 1.277378 0.967362 0.963182 ... 4.787933 2016.710178 6.0 20.0 77.110498 58.614637 2.456972 18.327424 14.011211 0.045470
std 80311.010157 3.817198e+05 0.502571 5.681511 1.708263 5.682938 2.168978 1.651107 0.177687 0.188315 ... 1.586724 5.248572 0.0 0.0 49.957873 34.117619 2.100374 18.578093 11.833930 0.208333
min 0.000000 3.359820e+05 1.000000 0.000000 1.000000 0.100000 1.000000 0.000000 0.000000 0.000000 ... 3.000000 2008.000000 6.0 20.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 69551.000000 5.483530e+05 1.000000 4.000000 2.000000 4.500000 2.000000 0.000000 1.000000 1.000000 ... 4.000000 2012.000000 6.0 20.0 36.000000 29.000000 1.000000 4.000000 5.000000 0.000000
50% 139102.000000 1.082601e+06 1.000000 9.000000 3.000000 9.400000 3.000000 1.000000 1.000000 1.000000 ... 4.000000 2017.000000 6.0 20.0 73.000000 58.000000 2.000000 12.000000 11.000000 0.000000
75% 208653.000000 1.304049e+06 2.000000 14.000000 5.000000 14.400000 5.000000 1.000000 1.000000 1.000000 ... 5.000000 2022.000000 6.0 20.0 113.000000 88.000000 4.000000 27.000000 20.000000 0.000000
max 278204.000000 1.485779e+06 6.000000 19.000000 7.000000 19.600000 11.000000 6.000000 1.000000 1.000000 ... 11.000000 2025.000000 6.0 20.0 287.000000 121.000000 10.000000 175.000000 73.000000 1.000000

8 rows × 26 columns

In [16]:
ipl_data.isnull().sum()
Out[16]:
Unnamed: 0               0
match_id                 0
date                     0
match_type               0
event_name               0
                     ...  
batter_balls             0
bowler_wicket            0
batting_partners         0
next_batter         264884
striker_out              0
Length: 64, dtype: int64
In [17]:
ipl_data.drop_duplicates(inplace=True)
In [18]:
ipl_data.reset_index(drop=True, inplace=True)
In [19]:
print("\033[1mData Overview\033[0m")
Data Overview
In [20]:
ipl_data.head(50)
Out[20]:
Unnamed: 0 match_id date match_type event_name innings batting_team bowling_team over ball ... team_runs team_balls team_wicket new_batter batter_runs batter_balls bowler_wicket batting_partners next_batter striker_out
0 131970 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 0 1 ... 1 1 0 NaN 0 1 0 ('BB McCullum', 'SC Ganguly') NaN False
1 131971 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 0 2 ... 1 2 0 NaN 0 1 0 ('BB McCullum', 'SC Ganguly') NaN False
2 131972 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 0 3 ... 2 2 0 NaN 0 1 0 ('BB McCullum', 'SC Ganguly') NaN False
3 131973 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 0 3 ... 2 3 0 NaN 0 2 0 ('BB McCullum', 'SC Ganguly') NaN False
4 131974 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 0 4 ... 2 4 0 NaN 0 3 0 ('BB McCullum', 'SC Ganguly') NaN False
5 131975 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 0 5 ... 2 5 0 NaN 0 4 0 ('BB McCullum', 'SC Ganguly') NaN False
6 131976 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 0 6 ... 3 6 0 NaN 0 5 0 ('BB McCullum', 'SC Ganguly') NaN False
7 131977 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 1 1 ... 3 7 0 NaN 0 6 0 ('BB McCullum', 'SC Ganguly') NaN False
8 131978 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 1 2 ... 7 8 0 NaN 4 7 0 ('BB McCullum', 'SC Ganguly') NaN False
9 131979 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 1 3 ... 11 9 0 NaN 8 8 0 ('BB McCullum', 'SC Ganguly') NaN False
10 131980 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 1 4 ... 17 10 0 NaN 14 9 0 ('BB McCullum', 'SC Ganguly') NaN False
11 131981 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 1 5 ... 21 11 0 NaN 18 10 0 ('BB McCullum', 'SC Ganguly') NaN False
12 131982 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 1 6 ... 21 12 0 NaN 18 11 0 ('BB McCullum', 'SC Ganguly') NaN False
13 131983 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 2 1 ... 21 13 0 NaN 0 2 0 ('BB McCullum', 'SC Ganguly') NaN False
14 131984 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 2 2 ... 21 14 0 NaN 0 3 0 ('BB McCullum', 'SC Ganguly') NaN False
15 131985 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 2 3 ... 22 15 0 NaN 0 4 0 ('BB McCullum', 'SC Ganguly') NaN False
16 131986 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 2 4 ... 26 16 0 NaN 22 12 0 ('BB McCullum', 'SC Ganguly') NaN False
17 131987 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 2 5 ... 27 17 0 NaN 23 13 0 ('BB McCullum', 'SC Ganguly') NaN False
18 131988 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 2 6 ... 27 18 0 NaN 0 5 0 ('BB McCullum', 'SC Ganguly') NaN False
19 131989 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 3 1 ... 32 18 0 NaN 23 13 0 ('BB McCullum', 'SC Ganguly') NaN False
20 131990 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 3 1 ... 38 19 0 NaN 29 14 0 ('BB McCullum', 'SC Ganguly') NaN False
21 131991 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 3 2 ... 39 20 0 NaN 29 15 0 ('BB McCullum', 'SC Ganguly') NaN False
22 131992 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 3 3 ... 43 21 0 NaN 4 6 0 ('BB McCullum', 'SC Ganguly') NaN False
23 131993 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 3 4 ... 43 22 0 NaN 4 7 0 ('BB McCullum', 'SC Ganguly') NaN False
24 131994 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 3 5 ... 44 23 0 NaN 5 8 0 ('BB McCullum', 'SC Ganguly') NaN False
25 131995 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 3 6 ... 50 24 0 NaN 35 16 0 ('BB McCullum', 'SC Ganguly') NaN False
26 131996 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 4 1 ... 54 25 0 NaN 9 9 0 ('BB McCullum', 'SC Ganguly') NaN False
27 131997 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 4 2 ... 55 26 0 NaN 10 10 0 ('BB McCullum', 'SC Ganguly') NaN False
28 131998 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 4 3 ... 59 27 0 NaN 39 17 0 ('BB McCullum', 'SC Ganguly') NaN False
29 131999 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 4 4 ... 59 28 0 NaN 39 18 0 ('BB McCullum', 'SC Ganguly') NaN False
30 132000 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 4 5 ... 60 29 0 NaN 40 19 0 ('BB McCullum', 'SC Ganguly') NaN False
31 132001 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 4 6 ... 60 30 0 NaN 10 11 0 ('BB McCullum', 'SC Ganguly') NaN False
32 132002 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 5 1 ... 61 31 0 NaN 41 20 0 ('BB McCullum', 'SC Ganguly') NaN False
33 132003 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 5 2 ... 61 32 1 NaN 10 12 1 ('BB McCullum', 'SC Ganguly') RT Ponting True
34 132004 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 5 3 ... 61 33 1 RT Ponting 0 1 0 ('BB McCullum', 'RT Ponting') NaN False
35 132005 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 5 4 ... 61 34 1 NaN 0 2 0 ('BB McCullum', 'RT Ponting') NaN False
36 132006 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 5 5 ... 61 35 1 NaN 0 3 0 ('BB McCullum', 'RT Ponting') NaN False
37 132007 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 5 6 ... 61 36 1 NaN 0 4 0 ('BB McCullum', 'RT Ponting') NaN False
38 132008 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 6 1 ... 62 37 1 NaN 42 21 0 ('BB McCullum', 'RT Ponting') NaN False
39 132009 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 6 2 ... 63 38 1 NaN 1 5 0 ('BB McCullum', 'RT Ponting') NaN False
40 132010 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 6 3 ... 64 39 1 NaN 43 22 0 ('BB McCullum', 'RT Ponting') NaN False
41 132011 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 6 4 ... 66 40 1 NaN 3 6 0 ('BB McCullum', 'RT Ponting') NaN False
42 132012 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 6 5 ... 67 41 1 NaN 4 7 0 ('BB McCullum', 'RT Ponting') NaN False
43 132013 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 6 6 ... 68 42 1 NaN 44 23 0 ('BB McCullum', 'RT Ponting') NaN False
44 132014 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 7 1 ... 68 43 1 NaN 44 24 0 ('BB McCullum', 'RT Ponting') NaN False
45 132015 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 7 2 ... 69 44 1 NaN 45 25 0 ('BB McCullum', 'RT Ponting') NaN False
46 132016 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 7 3 ... 70 45 1 NaN 5 8 0 ('BB McCullum', 'RT Ponting') NaN False
47 132017 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 7 4 ... 71 46 1 NaN 46 26 0 ('BB McCullum', 'RT Ponting') NaN False
48 132018 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 7 5 ... 72 47 1 NaN 6 9 0 ('BB McCullum', 'RT Ponting') NaN False
49 132019 335982 2008-04-18 T20 Indian Premier League 1 Kolkata Knight Riders Royal Challengers Bangalore 7 6 ... 73 48 1 NaN 47 27 0 ('BB McCullum', 'RT Ponting') NaN False

50 rows × 64 columns

In [21]:
ipl_data.shape
Out[21]:
(278205, 64)
In [22]:
ipl_data.columns
Out[22]:
Index(['Unnamed: 0', 'match_id', 'date', 'match_type', 'event_name', 'innings',
       'batting_team', 'bowling_team', 'over', 'ball', 'ball_no', 'batter',
       'bat_pos', 'runs_batter', 'balls_faced', 'bowler', 'valid_ball',
       'runs_extras', 'runs_total', 'runs_bowler', 'runs_not_boundary',
       'extra_type', 'non_striker', 'non_striker_pos', 'wicket_kind',
       'player_out', 'fielders', 'runs_target', 'review_batter',
       'team_reviewed', 'review_decision', 'umpire', 'umpires_call',
       'player_of_match', 'match_won_by', 'win_outcome', 'toss_winner',
       'toss_decision', 'venue', 'city', 'day', 'month', 'year', 'season',
       'gender', 'team_type', 'superover_winner', 'result_type', 'method',
       'balls_per_over', 'overs', 'event_match_no', 'stage', 'match_number',
       'team_runs', 'team_balls', 'team_wicket', 'new_batter', 'batter_runs',
       'batter_balls', 'bowler_wicket', 'batting_partners', 'next_batter',
       'striker_out'],
      dtype='object')
In [23]:
ipl_data['date'] = pd.to_datetime(ipl_data['date'], errors='coerce')
ipl_data['season'] = ipl_data['date'].dt.year
ipl_data = ipl_data[(ipl_data['season'] >= 2008) & (ipl_data['season'] <= 2025)]
In [24]:
ipl_data['season'].nunique()
Out[24]:
18
In [25]:
ipl_data['season'].unique()
Out[25]:
array([2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018,
       2019, 2020, 2021, 2022, 2023, 2024, 2025])
In [26]:
ipl_data['season'].value_counts().to_frame().T
Out[26]:
season 2013 2022 2023 2012 2025 2024 2011 2020 2010 2021 2019 2014 2018 2016 2017 2015 2009 2008
count 18177 17912 17863 17767 17285 17103 17013 14559 14498 14425 14312 14300 14286 14096 13862 13652 13606 13489
In [27]:
print(ipl_data['season'].value_counts().sort_index())
season
2008    13489
2009    13606
2010    14498
2011    17013
2012    17767
2013    18177
2014    14300
2015    13652
2016    14096
2017    13862
2018    14286
2019    14312
2020    14559
2021    14425
2022    17912
2023    17863
2024    17103
2025    17285
Name: count, dtype: int64
In [28]:
balls_per_season = ipl_data['season'].value_counts().sort_index()

plt.figure(figsize=(12, 6))
plt.bar(balls_per_season.index, balls_per_season.values, color='skyblue')
plt.title('Total Balls Played Per Season', fontsize=16, fontweight='bold')
plt.xlabel('Season', fontsize=12)
plt.ylabel('Total Balls Played', fontsize=12)
plt.xticks(rotation=45)
for i, value in enumerate(balls_per_season.values):
    plt.text(balls_per_season.index[i], value, str(value), ha='center', va='bottom', fontsize=9)

plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [29]:
unique_matches = ipl_data[['match_id', 'season']].drop_duplicates()
unique_matches['season'] = unique_matches['season'].astype(str).str.strip()
unique_matches = unique_matches[unique_matches['season'].str.isnumeric()]
unique_matches['season'] = unique_matches['season'].astype(int)
matches_per_season = unique_matches['season'].value_counts().sort_index()
print(matches_per_season)
season
2008    58
2009    57
2010    60
2011    73
2012    74
2013    76
2014    60
2015    59
2016    60
2017    59
2018    60
2019    60
2020    60
2021    60
2022    74
2023    74
2024    71
2025    74
Name: count, dtype: int64
In [30]:
matches_per_season = ipl_data[['match_id', 'season']].drop_duplicates()
matches_count = matches_per_season['season'].value_counts().sort_index()

plt.figure(figsize=(12, 6))
plt.bar(matches_count.index, matches_count.values, color='lightgreen')
plt.title('Total Matches Played Per Season', fontsize=16, fontweight='bold')
plt.xlabel('Season', fontsize=12)
plt.ylabel('Number of Matches', fontsize=12)
plt.xticks(rotation=45)

for i, value in enumerate(matches_count.values):
    plt.text(matches_count.index[i], value, str(value), ha='center', va='bottom', fontsize=9)

plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [31]:
matches_per_city = ipl_data[['match_id', 'city']].drop_duplicates()
city_match_counts = matches_per_city['city'].value_counts().sort_values(ascending=False)
print(city_match_counts)
city
Mumbai            180
Kolkata           100
Delhi              97
Chennai            91
Hyderabad          83
Bangalore          65
Jaipur             64
Chandigarh         61
Unknown            51
Pune               51
Ahmedabad          45
Abu Dhabi          37
Bengaluru          34
Lucknow            22
Visakhapatnam      17
Dharamsala         15
Durban             15
Dubai              13
Centurion          12
Sharjah            10
Rajkot             10
Indore              9
Mohali              9
Navi Mumbai         9
Johannesburg        8
Cuttack             7
Port Elizabeth      7
Cape Town           7
Ranchi              7
Raipur              6
Guwahati            5
Kochi               5
Kanpur              4
Nagpur              3
East London         3
Kimberley           3
Bloemfontein        2
New Chandigarh      2
Name: count, dtype: int64
In [32]:
matches_per_city = ipl_data[['match_id', 'city']].drop_duplicates()
city_match_counts = matches_per_city['city'].value_counts()
city_match_counts_df = city_match_counts.reset_index()
city_match_counts_df.columns = ['City', 'Matches_Hosted']

plt.figure(figsize=(14, 10))
sns.barplot(x='Matches_Hosted', y='City', data=city_match_counts_df, palette='viridis')

plt.title('Total Matches Hosted by Each City (All Venues)', fontsize=16, fontweight='bold')
plt.xlabel('Number of Matches', fontsize=12)
plt.ylabel('City', fontsize=12)
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [33]:
second_innings = ipl_data[ipl_data['innings'] == 2]
target_data = second_innings.groupby('match_id')['runs_total'].sum().reset_index()
target_data.columns = ['Match_ID', 'Target_Runs']

target_data.head()
Out[33]:
Match_ID Target_Runs
0 335982 82
1 335983 207
2 335984 132
3 335985 166
4 335986 112
In [34]:
plt.figure(figsize=(10, 6))
sns.histplot(target_data['Target_Runs'], bins=30, kde=True, color='orange')

plt.title('Distribution of Target Runs in IPL (2nd Innings)', fontsize=16, fontweight='bold')
plt.xlabel('Target Runs', fontsize=12)
plt.ylabel('Number of Matches', fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [35]:
second_innings = ipl_data[ipl_data['innings'] == 2]

target_by_match = second_innings.groupby(['match_id', 'season'])['runs_total'].sum().reset_index()
avg_target_by_season = target_by_match.groupby('season')['runs_total'].mean().reset_index()
avg_target_by_season.columns = ['Season', 'Avg_Target_Runs']
avg_target_by_season.head(18)
Out[35]:
Season Avg_Target_Runs
0 2008 148.293103
1 2009 136.052632
2 2010 149.616667
3 2011 139.319444
4 2012 145.878378
5 2013 140.697368
6 2014 152.083333
7 2015 146.948276
8 2016 151.766667
9 2017 152.338983
10 2018 159.216667
11 2019 156.600000
12 2020 153.033333
13 2021 151.050000
14 2022 158.540541
15 2023 166.657534
16 2024 176.197183
17 2025 174.013889
In [36]:
plt.figure(figsize=(12, 6))
sns.lineplot(data=avg_target_by_season, x='Season', y='Avg_Target_Runs', marker='o', color='crimson')

plt.title('Trend of Average Target Runs by Season', fontsize=16, fontweight='bold')
plt.xlabel('Season', fontsize=12)
plt.ylabel('Average Target Runs', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)

# Annotate values on the line
for i, row in avg_target_by_season.iterrows():
    plt.text(row['Season'], row['Avg_Target_Runs'] + 1, f"{row['Avg_Target_Runs']:.0f}",
             ha='center', va='bottom', fontsize=9, color='black')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [37]:
matches_df = ipl_data.drop_duplicates(subset='match_id')
matches_df = matches_df.dropna(subset=['win_outcome'])
matches_df['win_outcome'] = matches_df['win_outcome'].astype(str).str.strip().str.lower()
matches_df['win_type'] = matches_df['win_outcome'].apply(
    lambda x: 'runs' if 'runs' in x else ('wickets' if 'wickets' in x else None)
)
matches_df['win_margin'] = matches_df['win_outcome'].str.extract(r'(\d+)')
matches_df['win_margin'] = pd.to_numeric(matches_df['win_margin'], errors='coerce')
won_by_runs = matches_df[matches_df['win_type'] == 'runs'][['season', 'match_id', 'win_margin']].copy()
won_by_wickets = matches_df[matches_df['win_type'] == 'wickets'][['season', 'match_id', 'win_margin']].copy()
won_by_runs.rename(columns={'win_margin': 'Win_Margin_Runs'}, inplace=True)
won_by_wickets.rename(columns={'win_margin': 'Win_Margin_Wickets'}, inplace=True)
print("🏏 Won by Runs Sample:\n", won_by_runs.head())
print("🏏 Won by Wickets Sample:\n", won_by_wickets.head())
🏏 Won by Runs Sample:
       season  match_id  Win_Margin_Runs
0       2008    335982              140
225     2008    335983               33
1624    2008    335989                6
2127    2008    335991               66
3289    2008    335996               13
🏏 Won by Wickets Sample:
       season  match_id  Win_Margin_Wickets
473     2008    335984                   9
692     2008    335985                   5
938     2008    335986                   5
1178    2008    335987                   6
1419    2008    335988                   9
In [38]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))
sns.histplot(won_by_runs['Win_Margin_Runs'], bins=30, kde=True, color='orangered')
plt.title('Distribution of Result Margin - Won by Runs', fontsize=16)
plt.xlabel('Win Margin (Runs)', fontsize=12)
plt.ylabel('Number of Matches', fontsize=12)
plt.grid(True)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [39]:
plt.figure(figsize=(10, 6))
sns.countplot(x='Win_Margin_Wickets', data=won_by_wickets, palette='Blues')
plt.title('Distribution of Result Margin - Won by Wickets', fontsize=16)
plt.xlabel('Win Margin (Wickets)', fontsize=12)
plt.ylabel('Number of Matches', fontsize=12)
plt.grid(True)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [40]:
matches_df = ipl_data.drop_duplicates(subset='match_id')
matches_df['win_outcome'] = matches_df['win_outcome'].astype(str).str.strip().str.lower()
def extract_margin_type(value):
    try:
        if pd.notna(value) and ('runs' in value or 'wickets' in value):
            parts = value.split()
            return pd.Series([int(parts[0]), parts[1]])
    except:
        return pd.Series([None, None])
    return pd.Series([None, None])
matches_df[['margin', 'margin_type']] = matches_df['win_outcome'].apply(extract_margin_type)
matches_df = matches_df.dropna(subset=['margin'])
matches_df['margin'] = matches_df['margin'].astype(int)
In [41]:
avg_margin_by_season = matches_df.groupby(['season', 'margin_type'])['margin'].mean().reset_index()
avg_margin_by_season.columns = ['Season', 'Margin_Type', 'Average_Margin']
avg_margin_by_season.head(36)
Out[41]:
Season Margin_Type Average_Margin
0 2008 runs 29.375000
1 2008 wickets 6.500000
2 2009 runs 28.296296
3 2009 wickets 6.206897
4 2010 runs 31.483871
5 2010 wickets 6.785714
6 2011 runs 33.272727
7 2011 wickets 6.794872
8 2012 runs 28.235294
9 2012 wickets 6.025000
10 2013 runs 33.540541
11 2013 wickets 6.135135
12 2014 runs 29.272727
13 2014 wickets 6.081081
14 2015 runs 26.562500
15 2015 wickets 6.166667
16 2016 runs 32.190476
17 2016 wickets 6.256410
18 2017 runs 30.307692
19 2017 wickets 6.375000
20 2018 runs 24.107143
21 2018 wickets 5.812500
22 2019 runs 30.227273
23 2019 wickets 5.771429
24 2020 runs 39.370370
25 2020 wickets 6.965517
26 2021 runs 26.454545
27 2021 wickets 5.918919
28 2022 runs 27.945946
29 2022 wickets 6.000000
30 2023 runs 30.400000
31 2023 wickets 5.727273
32 2024 runs 30.142857
33 2024 wickets 5.944444
34 2025 runs 33.181818
35 2025 wickets 6.324324
In [42]:
plt.figure(figsize=(12, 6))
sns.lineplot(data=avg_margin_by_season, x='Season', y='Average_Margin', hue='Margin_Type', marker='o')

plt.title('Average Match Win Margin by Season (Runs vs Wickets)', fontsize=14)
plt.xlabel('Season')
plt.ylabel('Average Win Margin')
plt.xticks(rotation=45)
plt.grid(True)
plt.legend(title='Margin Type')
plt.tight_layout()
plt.show()
No description has been provided for this image
In [43]:
matches_df = ipl_data.drop_duplicates(subset='match_id')
matches_df = matches_df.dropna(subset=['toss_winner', 'toss_decision', 'match_won_by'])
matches_df['toss_decision'] = matches_df['toss_decision'].astype(str).str.strip().str.lower()
matches_df['won_after_toss'] = matches_df['toss_winner'] == matches_df['match_won_by']
toss_win_stats = matches_df.groupby('toss_decision')['won_after_toss'].value_counts(normalize=True).unstack().fillna(0) * 100
toss_win_stats.columns = ['Lost Match', 'Won Match']
toss_win_stats = toss_win_stats.reset_index()
toss_win_stats
Out[43]:
toss_decision Lost Match Won Match
0 bat 54.814815 45.185185
1 field 46.596859 53.403141
In [44]:
toss_win_stats_melted = toss_win_stats.melt(id_vars='toss_decision', value_vars=['Won Match', 'Lost Match'], 
                                            var_name='Match Outcome', value_name='Percentage')

plt.figure(figsize=(10, 6))
sns.barplot(data=toss_win_stats_melted, x='toss_decision', y='Percentage', hue='Match Outcome')

plt.title('Percentage of Matches Won Based on Toss Decision', fontsize=14)
plt.xlabel('Toss Decision')
plt.ylabel('Percentage (%)')
plt.grid(axis='y')
plt.tight_layout()
plt.show()
No description has been provided for this image
In [45]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
super_over_df = df[df['superover_winner'].notna()]
super_over_matches = super_over_df.drop_duplicates(subset=['match_id'])
def clean_season(season):
    if isinstance(season, str) and '/' in season:
        return int(season.split('/')[0])  # Take first part before '/'
    return int(season)

super_over_matches['season'] = super_over_matches['season'].apply(clean_season)
super_over_by_season = super_over_matches.groupby('season')['match_id'].nunique().reset_index()
super_over_by_season.columns = ['Season', 'Super_Over_Matches']
super_over_by_season = super_over_by_season.sort_values(by='Season').reset_index(drop=True)
print(super_over_by_season)
   Season  Super_Over_Matches
0    2009                   2
1    2013                   2
2    2014                   1
3    2015                   1
4    2017                   1
5    2019                   2
6    2020                   4
7    2021                   1
8    2025                   1
In [46]:
plt.figure(figsize=(10, 6))
sns.barplot(data=super_over_by_season, x='Season', y='Super_Over_Matches', palette='mako')

plt.title('Super Over Matches by IPL Season', fontsize=16)
plt.xlabel('Season')
plt.ylabel('Super Over Matches')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [47]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
umpire_df = df[['match_id', 'umpire', 'umpires_call']].drop_duplicates()
umpire_list = pd.concat([
    umpire_df[['match_id', 'umpire']].rename(columns={'umpire': 'Umpire'}),
    umpire_df[['match_id', 'umpires_call']].rename(columns={'umpires_call': 'Umpire'})
])
umpire_list = umpire_list.dropna()
umpire_list['Umpire'] = umpire_list['Umpire'].astype(str).str.strip()
umpire_list = umpire_list[~umpire_list['Umpire'].isin(['True', 'False'])]
umpire_counts = umpire_list.groupby('Umpire')['match_id'].nunique().reset_index()
umpire_counts.columns = ['Umpire', 'Match_Count']
top_umpires = umpire_counts.sort_values(by='Match_Count', ascending=False).head(10).reset_index(drop=True)
print(top_umpires)
                  Umpire  Match_Count
0            Nitin Menon           44
1           AK Chaudhary           43
2  KN Ananthapadmanabhan           38
3            CB Gaffaney           37
4              VK Sharma           35
5              UV Gandhe           26
6               YC Barde           23
7          J Madanagopal           23
8               R Pandit           20
9   MV Saidharshan Kumar           18
In [48]:
plt.figure(figsize=(10, 6))
sns.barplot(data=top_umpires, y='Umpire', x='Match_Count', palette='viridis')

plt.title('Top 10 IPL Umpires by Match Count (All Seasons)', fontsize=16)
plt.xlabel('Match Count')
plt.ylabel('Umpire')
plt.tight_layout()
plt.show()
No description has been provided for this image
In [49]:
print("Total matches in dataset:", df['match_id'].nunique())
with_umpire = df[df['umpire'].notna() | df['umpires_call'].notna()]
print("Matches with umpire info:", with_umpire['match_id'].nunique())
Total matches in dataset: 1169
Matches with umpire info: 1169
In [50]:
print(df['umpire'].dropna().unique())
print(df['umpires_call'].dropna().unique())
['CB Gaffaney' 'RJ Tucker' 'C Shamshuddin' 'A Deshmukh' 'VA Kulkarni'
 'AK Chaudhary' 'CK Nandan' 'NJ Llong' 'S Ravi' 'Nitin Menon'
 'KN Ananthapadmanabhan' 'A Nand Kishore' 'VK Sharma' 'YC Barde'
 'M Erasmus' 'HDPK Dharmasena' 'BNJ Oxenford' 'AY Dandekar' 'UV Gandhe'
 'IJ Gould' 'PR Reiffel' 'RK Illingworth' 'K Srinivasan' 'PG Pathak'
 'J Madanagopal' 'Navdeep Singh' 'Tapan Sharma' 'HAS Khalid' 'MA Gough'
 'N Pandit' 'R Pandit' 'Chirra Ravikanthreddy' 'NA Patwardhan'
 'GR Sadashiv Iyer' 'Vinod Seshan' 'A Totre' 'MV Saidharshan Kumar'
 'AG Wharf' 'Abhijit Bhattacharya' 'A Bengeri' 'AT Holdstock'
 'K Swaroopanand' 'P Joshi' 'M Krishnadas' 'K Kelkar' 'KM Gandhi'
 'Anish Sahasrabudhe']
[False  True]
In [51]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
match_teams = df[['match_id', 'batting_team']].drop_duplicates()
matches_played = match_teams['batting_team'].value_counts().reset_index()
matches_played.columns = ['Team', 'Total_Matches']
matches = df.drop_duplicates(subset=['match_id'])
matches_won = matches['match_won_by'].value_counts().reset_index()
matches_won.columns = ['Team', 'Wins']
team_stats = pd.merge(matches_played, matches_won, on='Team', how='left')
team_stats['Wins'] = team_stats['Wins'].fillna(0).astype(int)
team_stats = team_stats.sort_values(by='Total_Matches', ascending=False).reset_index(drop=True)

print(team_stats)
                           Team  Total_Matches  Wins
0                Mumbai Indians            277   151
1         Kolkata Knight Riders            264   135
2           Chennai Super Kings            251   142
3   Royal Challengers Bangalore            240   114
4              Rajasthan Royals            234   114
5           Sunrisers Hyderabad            195    93
6               Kings XI Punjab            190    85
7              Delhi Daredevils            161    67
8                Delhi Capitals            105    51
9               Deccan Chargers             75    29
10                 Punjab Kings             74    34
11               Gujarat Titans             60    37
12         Lucknow Super Giants             58    30
13                Pune Warriors             45    12
14                Gujarat Lions             30    13
15  Royal Challengers Bengaluru             30    18
16       Rising Pune Supergiant             16    10
17      Rising Pune Supergiants             14     5
18         Kochi Tuskers Kerala             14     6
In [52]:
plt.figure(figsize=(14, 8))
bar_width = 0.4
x = range(len(team_stats))

plt.bar(x, team_stats['Total_Matches'], width=bar_width, label='Total Matches', color='steelblue')
plt.bar([p + bar_width for p in x], team_stats['Wins'], width=bar_width, label='Wins', color='seagreen')
plt.xticks([p + bar_width / 2 for p in x], team_stats['Team'], rotation=60, ha='right')
plt.ylabel("Count")
plt.title("IPL Match Statistics: Total Matches Played vs Wins (All Teams)")
plt.legend()
plt.tight_layout()
plt.show()
No description has been provided for this image
In [53]:
team_stats['Win%'] = (team_stats['Wins'] / team_stats['Total_Matches'] * 100).round(2)
team_stats = team_stats.sort_values(by='Win%', ascending=False)
print(team_stats[['Team', 'Total_Matches', 'Wins', 'Win%']])
                           Team  Total_Matches  Wins   Win%
16       Rising Pune Supergiant             16    10  62.50
11               Gujarat Titans             60    37  61.67
15  Royal Challengers Bengaluru             30    18  60.00
2           Chennai Super Kings            251   142  56.57
0                Mumbai Indians            277   151  54.51
12         Lucknow Super Giants             58    30  51.72
1         Kolkata Knight Riders            264   135  51.14
4              Rajasthan Royals            234   114  48.72
8                Delhi Capitals            105    51  48.57
5           Sunrisers Hyderabad            195    93  47.69
3   Royal Challengers Bangalore            240   114  47.50
10                 Punjab Kings             74    34  45.95
6               Kings XI Punjab            190    85  44.74
14                Gujarat Lions             30    13  43.33
18         Kochi Tuskers Kerala             14     6  42.86
7              Delhi Daredevils            161    67  41.61
9               Deccan Chargers             75    29  38.67
17      Rising Pune Supergiants             14     5  35.71
13                Pune Warriors             45    12  26.67
In [54]:
team_stats_sorted = team_stats.sort_values(by='Win%', ascending=True)
plt.figure(figsize=(12, 8))
sns.barplot(data=team_stats_sorted, x='Win%', y='Team', palette='coolwarm')

plt.title('IPL Teams by Win Percentage (All Seasons)', fontsize=16)
plt.xlabel('Win Percentage (%)')
plt.ylabel('Team')
plt.xlim(0, 100)  # since it's a percentage
plt.tight_layout()
plt.show()
No description has been provided for this image
In [55]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
match_df = df.drop_duplicates(subset='match_id')
toss_wins = match_df['toss_winner'].value_counts().reset_index()
toss_wins.columns = ['Team', 'Toss_Wins']
toss_match_win = match_df[match_df['toss_winner'] == match_df['match_won_by']]
toss_match_wins = toss_match_win['toss_winner'].value_counts().reset_index()
toss_match_wins.columns = ['Team', 'Toss_Match_Wins']
toss_analysis = pd.merge(toss_wins, toss_match_wins, on='Team', how='left')
toss_analysis['Toss_Match_Wins'] = toss_analysis['Toss_Match_Wins'].fillna(0).astype(int)
toss_analysis['Toss_Success_%'] = (toss_analysis['Toss_Match_Wins'] / toss_analysis['Toss_Wins'] * 100).round(2)
toss_analysis = toss_analysis.sort_values(by='Toss_Success_%', ascending=False).reset_index(drop=True)
print(toss_analysis)
                           Team  Toss_Wins  Toss_Match_Wins  Toss_Success_%
0        Rising Pune Supergiant          6                5           83.33
1   Royal Challengers Bengaluru         15               10           66.67
2                 Gujarat Lions         15               10           66.67
3                Gujarat Titans         29               19           65.52
4           Chennai Super Kings        128               78           60.94
5         Kolkata Knight Riders        128               71           55.47
6                Mumbai Indians        151               82           54.30
7          Lucknow Super Giants         24               13           54.17
8                Delhi Capitals         58               29           50.00
9          Kochi Tuskers Kerala          8                4           50.00
10  Royal Challengers Bangalore        113               56           49.56
11             Rajasthan Royals        127               61           48.03
12                 Punjab Kings         37               17           45.95
13              Deccan Chargers         43               19           44.19
14             Delhi Daredevils         80               35           43.75
15          Sunrisers Hyderabad         95               41           43.16
16      Rising Pune Supergiants          7                3           42.86
17              Kings XI Punjab         85               35           41.18
18                Pune Warriors         20                3           15.00
In [56]:
toss_plot = toss_analysis.sort_values(by='Toss_Success_%', ascending=True)
plt.figure(figsize=(12, 8))
sns.barplot(data=toss_plot, x='Toss_Success_%', y='Team', palette='plasma')

plt.title('Toss to Match Win Success % by IPL Team', fontsize=16)
plt.xlabel('Toss Success %')
plt.ylabel('Team')
plt.xlim(0, 100)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [57]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()

match_df = df.drop_duplicates(subset='match_id')
matches_played = pd.concat([
    match_df[['match_id', 'toss_winner']].rename(columns={'toss_winner': 'Team'}),
    match_df[['match_id', 'match_won_by']].rename(columns={'match_won_by': 'Team'})
])
matches_played = matches_played.dropna()
total_matches = matches_played['Team'].value_counts().reset_index()
total_matches.columns = ['Team', 'Total_Matches']
toss_wins = match_df['toss_winner'].value_counts().reset_index()
toss_wins.columns = ['Team', 'Toss_Win']

toss_and_match_win = match_df[match_df['toss_winner'] == match_df['match_won_by']]
toss_match_win = toss_and_match_win['toss_winner'].value_counts().reset_index()
toss_match_win.columns = ['Team', 'Toss+Match_Win']
bat_first_win = match_df[
    (match_df['toss_decision'] == 'bat') &
    (match_df['toss_winner'] == match_df['match_won_by'])
]
bat_first_win_count = bat_first_win['toss_winner'].value_counts().reset_index()
bat_first_win_count.columns = ['Team', 'Bat_First_Win_After_Toss']
chase_win = match_df[
    (match_df['toss_decision'] == 'field') &
    (match_df['toss_winner'] == match_df['match_won_by'])
]
chase_win_count = chase_win['toss_winner'].value_counts().reset_index()
chase_win_count.columns = ['Team', 'Chasing_Win_After_Toss']
result = total_matches.merge(toss_wins, on='Team', how='left') \
                      .merge(toss_match_win, on='Team', how='left') \
                      .merge(bat_first_win_count, on='Team', how='left') \
                      .merge(chase_win_count, on='Team', how='left')
result = result.fillna(0).astype({'Toss_Win': 'int', 'Toss+Match_Win': 'int',
                                  'Bat_First_Win_After_Toss': 'int',
                                  'Chasing_Win_After_Toss': 'int'})
result = result.sort_values(by='Total_Matches', ascending=False).reset_index(drop=True)
print(result)
                           Team  Total_Matches  Toss_Win  Toss+Match_Win  \
0                Mumbai Indians            302       151              82   
1           Chennai Super Kings            270       128              78   
2         Kolkata Knight Riders            263       128              71   
3              Rajasthan Royals            241       127              61   
4   Royal Challengers Bangalore            227       113              56   
5           Sunrisers Hyderabad            188        95              41   
6               Kings XI Punjab            170        85              35   
7              Delhi Daredevils            147        80              35   
8                Delhi Capitals            109        58              29   
9               Deccan Chargers             72        43              19   
10                 Punjab Kings             71        37              17   
11               Gujarat Titans             66        29              19   
12         Lucknow Super Giants             54        24              13   
13  Royal Challengers Bengaluru             33        15              10   
14                Pune Warriors             32        20               3   
15                Gujarat Lions             28        15              10   
16                      Unknown             23         0               0   
17       Rising Pune Supergiant             16         6               5   
18         Kochi Tuskers Kerala             14         8               4   
19      Rising Pune Supergiants             12         7               3   

    Bat_First_Win_After_Toss  Chasing_Win_After_Toss  
0                         30                      52  
1                         36                      42  
2                         21                      50  
3                         17                      44  
4                         16                      40  
5                         13                      28  
6                          6                      29  
7                         11                      24  
8                          7                      22  
9                         11                       8  
10                         3                      14  
11                         3                      16  
12                         5                       8  
13                         1                       9  
14                         3                       0  
15                         0                      10  
16                         0                       0  
17                         0                       5  
18                         0                       4  
19                         0                       3  
In [58]:
plot_data = result.head(10)
teams = plot_data['Team']
x = np.arange(len(teams))
width = 0.2

plt.figure(figsize=(14, 7))
plt.bar(x - width*1.5, plot_data['Toss_Win'], width, label='Toss Wins', color='skyblue')
plt.bar(x - width/2, plot_data['Toss+Match_Win'], width, label='Toss + Match Wins', color='limegreen')
plt.bar(x + width/2, plot_data['Bat_First_Win_After_Toss'], width, label='Bat First Wins', color='orange')
plt.bar(x + width*1.5, plot_data['Chasing_Win_After_Toss'], width, label='Chasing Wins', color='plum')

plt.xticks(x, teams, rotation=45, ha='right')
plt.ylabel('Match Count')
plt.title('IPL Team Strategy: Toss & Match Outcome Breakdown')
plt.legend()
plt.tight_layout()
plt.show()
No description has been provided for this image
In [59]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()

team_scores = df.groupby(['match_id', 'batting_team'])['runs_total'].sum().reset_index()

highest_scores = team_scores.groupby('batting_team')['runs_total'].max().reset_index()
highest_scores.columns = ['Team', 'Highest_Score']

highest_scores = highest_scores.sort_values(by='Highest_Score', ascending=False).reset_index(drop=True)
print(highest_scores)
                           Team  Highest_Score
0           Sunrisers Hyderabad            287
1         Kolkata Knight Riders            272
2   Royal Challengers Bangalore            263
3   Royal Challengers Bengaluru            262
4                  Punjab Kings            262
5          Lucknow Super Giants            257
6                Delhi Capitals            257
7                Mumbai Indians            247
8           Chennai Super Kings            246
9              Rajasthan Royals            242
10               Gujarat Titans            233
11              Kings XI Punjab            232
12             Delhi Daredevils            231
13              Deccan Chargers            214
14                Gujarat Lions            208
15      Rising Pune Supergiants            195
16                Pune Warriors            192
17       Rising Pune Supergiant            187
18         Kochi Tuskers Kerala            184
In [60]:
plot_scores = highest_scores.sort_values(by='Highest_Score', ascending=True)

plt.figure(figsize=(12, 8))
sns.barplot(data=plot_scores, x='Highest_Score', y='Team', palette='crest')

plt.title('Highest Run Score by Each IPL Team (All Seasons)', fontsize=16)
plt.xlabel('Highest Score (Runs)')
plt.ylabel('Team')
plt.tight_layout()
plt.show()
No description has been provided for this image
In [61]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()

innings_score = df.groupby(['match_id', 'batting_team'])['runs_total'].sum().reset_index()
innings_score.columns = ['match_id', 'Team', 'Total_Runs']
high_scores = innings_score[innings_score['Total_Runs'] >= 200]
team_200plus = high_scores['Team'].value_counts().reset_index()
team_200plus.columns = ['Team', '200+ Scores']
team_200plus = team_200plus.sort_values(by='200+ Scores', ascending=False).reset_index(drop=True)

print(team_200plus)
                           Team  200+ Scores
0           Chennai Super Kings           35
1                Mumbai Indians           31
2         Kolkata Knight Riders           29
3              Rajasthan Royals           26
4           Sunrisers Hyderabad           26
5   Royal Challengers Bangalore           24
6                  Punjab Kings           18
7                Gujarat Titans           16
8               Kings XI Punjab           15
9                Delhi Capitals           15
10         Lucknow Super Giants           13
11  Royal Challengers Bengaluru           10
12             Delhi Daredevils            5
13              Deccan Chargers            1
14                Gujarat Lions            1
In [62]:
plot_200 = team_200plus.sort_values(by='200+ Scores', ascending=True)

plt.figure(figsize=(12, 8))
sns.barplot(data=plot_200, x='200+ Scores', y='Team', palette='rocket')

plt.title('Number of 200+ Runs Innings by IPL Team', fontsize=16)
plt.xlabel('200+ Run Innings Count')
plt.ylabel('Team')
plt.tight_layout()
plt.show()
No description has been provided for this image
In [63]:
f = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
df['season'] = df['season'].astype(str)
df['season'] = df['season'].replace({
    '2007/08': '2008',
    '2009/10': '2010',
    '2020/21': '2021'
})
df['season'] = df['season'].str.extract(r'(20\d{2})')  # captures '2008' to '2099'
df['season'] = df['season'].astype(int)
innings_runs = df.groupby(['match_id', 'batting_team', 'season'])['runs_total'].sum().reset_index()
innings_runs.columns = ['match_id', 'Team', 'Season', 'Total_Runs']
high_scoring = innings_runs[innings_runs['Total_Runs'] >= 200]
season_200plus = high_scoring.groupby('Season')['match_id'].nunique().reset_index()
season_200plus.columns = ['Season', '200+ Run Innings']
season_200plus = season_200plus.sort_values(by='Season').reset_index(drop=True)
print(season_200plus)
    Season  200+ Run Innings
0     2008                 7
1     2009                 1
2     2010                 5
3     2011                 4
4     2012                 4
5     2013                 4
6     2014                 6
7     2015                 8
8     2016                 5
9     2017                 8
10    2018                11
11    2019                 9
12    2021                15
13    2022                13
14    2023                25
15    2024                27
16    2025                35
In [64]:
plt.figure(figsize=(12, 6))
sns.set_style("whitegrid")
sns.lineplot(data=season_200plus, x='Season', y='200+ Run Innings', marker='o', color='crimson', linewidth=2.5)
plt.title('🔥 Trend of 200+ Run Innings in IPL (2008–2025)', fontsize=16)
plt.xlabel('Season')
plt.ylabel('Number of 200+ Scores')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [65]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()

df['season'] = df['season'].astype(str)
df['season'] = df['season'].replace({
    '2007/08': '2008',
    '2009/10': '2010',
    '2020/21': '2021'
})
df['season'] = df['season'].str.extract(r'(20\d{2})')
df['season'] = df['season'].astype(int)
innings_runs = df.groupby(['match_id', 'batting_team', 'bowling_team', 'venue', 'season'])['runs_total'].sum().reset_index()
innings_runs.columns = ['match_id', 'Team', 'Opponent', 'Venue', 'Season', 'Total_Runs']
highest_each_season = innings_runs.loc[innings_runs.groupby('Season')['Total_Runs'].idxmax()].sort_values(by='Season').reset_index(drop=True)
highest_each_season = highest_each_season.rename(columns={'Total_Runs': 'Highest_Score'})
print(highest_each_season[['Season', 'Team', 'Opponent', 'Venue', 'Highest_Score']])
    Season                         Team                     Opponent  \
0     2008          Chennai Super Kings              Kings XI Punjab   
1     2009             Rajasthan Royals              Kings XI Punjab   
2     2010          Chennai Super Kings             Rajasthan Royals   
3     2011              Kings XI Punjab  Royal Challengers Bangalore   
4     2012          Chennai Super Kings             Delhi Daredevils   
5     2013  Royal Challengers Bangalore                Pune Warriors   
6     2014              Kings XI Punjab          Chennai Super Kings   
7     2015  Royal Challengers Bangalore               Mumbai Indians   
8     2016  Royal Challengers Bangalore                Gujarat Lions   
9     2017              Kings XI Punjab               Mumbai Indians   
10    2018        Kolkata Knight Riders              Kings XI Punjab   
11    2019        Kolkata Knight Riders               Mumbai Indians   
12    2021               Mumbai Indians          Sunrisers Hyderabad   
13    2022             Rajasthan Royals               Delhi Capitals   
14    2023         Lucknow Super Giants                 Punjab Kings   
15    2024          Sunrisers Hyderabad  Royal Challengers Bengaluru   
16    2025          Sunrisers Hyderabad             Rajasthan Royals   

                                                Venue  Highest_Score  
0          Punjab Cricket Association Stadium, Mohali            240  
1                                           Kingsmead            211  
2                     MA Chidambaram Stadium, Chepauk            246  
3        Himachal Pradesh Cricket Association Stadium            232  
4                     MA Chidambaram Stadium, Chepauk            222  
5                               M Chinnaswamy Stadium            263  
6                                    Barabati Stadium            231  
7                                    Wankhede Stadium            235  
8                               M Chinnaswamy Stadium            248  
9                                    Wankhede Stadium            230  
10                             Holkar Cricket Stadium            245  
11                                       Eden Gardens            232  
12                   Zayed Cricket Stadium, Abu Dhabi            235  
13                           Wankhede Stadium, Mumbai            222  
14  Punjab Cricket Association IS Bindra Stadium, ...            257  
15                   M Chinnaswamy Stadium, Bengaluru            287  
16  Rajiv Gandhi International Stadium, Uppal, Hyd...            286  
In [66]:
plt.figure(figsize=(12, 6))
sns.set_style("whitegrid")
sns.lineplot(data=highest_each_season, x='Season', y='Highest_Score', marker='o', linewidth=2.5, color='purple')

plt.title('🔥 Highest IPL Team Scores Per Season (2008–2024)', fontsize=16)
plt.xlabel('Season')
plt.ylabel('Highest Score')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [67]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()

df['season'] = df['season'].astype(str)
df['season'] = df['season'].replace({
    '2007/08': '2008',
    '2009/10': '2010',
    '2020/21': '2021'
})
df['season'] = df['season'].str.extract(r'(20\d{2})')
df['season'] = df['season'].astype(int)

innings_runs = df.groupby(['match_id', 'batting_team', 'season'])['runs_total'].sum().reset_index()
innings_runs.columns = ['match_id', 'Team', 'Season', 'Total_Runs']

high_scores = innings_runs[innings_runs['Total_Runs'] >= 200]

team_season_200plus = high_scores.groupby(['Team', 'Season'])['match_id'].nunique().reset_index()
team_season_200plus.columns = ['Team', 'Season', '200+ Scores']

pivot_table = team_season_200plus.pivot(index='Team', columns='Season', values='200+ Scores').fillna(0).astype(int)

pivot_table['Total_200+'] = pivot_table.sum(axis=1)
pivot_table = pivot_table.sort_values(by='Total_200+', ascending=False)

print(pivot_table.drop(columns='Total_200+'))
Season                       2008  2009  2010  2011  2012  2013  2014  2015  \
Team                                                                          
Chennai Super Kings             3     0     1     1     2     2     2     1   
Mumbai Indians                  1     0     2     0     0     1     0     2   
Kolkata Knight Riders           2     0     1     0     0     0     1     0   
Rajasthan Royals                2     1     2     0     0     0     1     0   
Sunrisers Hyderabad             0     0     0     0     0     0     1     1   
Royal Challengers Bangalore     0     0     1     1     2     1     0     3   
Punjab Kings                    0     0     0     0     0     0     0     0   
Gujarat Titans                  0     0     0     0     0     0     0     0   
Delhi Capitals                  0     0     0     0     0     0     0     0   
Kings XI Punjab                 2     0     2     2     0     0     4     1   
Lucknow Super Giants            0     0     0     0     0     0     0     0   
Royal Challengers Bengaluru     0     0     0     0     0     0     0     0   
Delhi Daredevils                0     0     0     1     1     0     0     0   
Deccan Chargers                 1     0     0     0     0     0     0     0   
Gujarat Lions                   0     0     0     0     0     0     0     0   

Season                       2016  2017  2018  2019  2021  2022  2023  2024  \
Team                                                                          
Chennai Super Kings             0     0     4     0     3     4     5     4   
Mumbai Indians                  1     2     2     0     5     0     6     3   
Kolkata Knight Riders           0     0     3     4     2     2     4     6   
Rajasthan Royals                0     0     1     0     4     3     4     3   
Sunrisers Hyderabad             1     3     1     3     2     0     3     6   
Royal Challengers Bangalore     4     1     2     3     2     2     2     0   
Punjab Kings                    0     0     0     0     1     2     4     3   
Gujarat Titans                  0     0     0     0     0     0     5     3   
Delhi Capitals                  0     0     0     1     1     3     1     5   
Kings XI Punjab                 0     1     1     0     2     0     0     0   
Lucknow Super Giants            0     0     0     0     0     2     3     2   
Royal Challengers Bengaluru     0     0     0     0     0     0     0     6   
Delhi Daredevils                0     2     1     0     0     0     0     0   
Deccan Chargers                 0     0     0     0     0     0     0     0   
Gujarat Lions                   0     1     0     0     0     0     0     0   

Season                       2025  
Team                               
Chennai Super Kings             3  
Mumbai Indians                  6  
Kolkata Knight Riders           4  
Rajasthan Royals                5  
Sunrisers Hyderabad             5  
Royal Challengers Bangalore     0  
Punjab Kings                    8  
Gujarat Titans                  8  
Delhi Capitals                  4  
Kings XI Punjab                 0  
Lucknow Super Giants            6  
Royal Challengers Bengaluru     4  
Delhi Daredevils                0  
Deccan Chargers                 0  
Gujarat Lions                   0  
In [68]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()

df['season'] = df['season'].astype(str).replace({
    '2007/08': '2008',
    '2009/10': '2010',
    '2020/21': '2021'
})
df['season'] = df['season'].str.extract(r'(20\d{2})')
df['season'] = df['season'].astype(int)

innings_runs = df.groupby(['match_id', 'batting_team', 'season'])['runs_total'].sum().reset_index()
innings_runs.columns = ['match_id', 'Team', 'Season', 'Total_Runs']

high_scores = innings_runs[innings_runs['Total_Runs'] >= 200]

team_season_200plus = high_scores.groupby(['Team', 'Season'])['match_id'].nunique().reset_index()
team_season_200plus.columns = ['Team', 'Season', '200+ Scores']
all_seasons = list(range(2008, 2026))
teams = team_season_200plus['Team'].unique()

for team in sorted(teams):
    team_data = team_season_200plus[team_season_200plus['Team'] == team]
    team_dict = dict(zip(team_data['Season'], team_data['200+ Scores']))
    scores_by_year = [team_dict.get(year, 0) for year in all_seasons]

    plt.figure(figsize=(10, 4))
    sns.lineplot(x=all_seasons, y=scores_by_year, marker='o', color='green', linewidth=2.5)
    plt.title(f'📊 200+ Run Innings by {team} (2008–2025)', fontsize=14)
    plt.xlabel('Season')
    plt.ylabel('No. of 200+ Scores')
    plt.xticks(all_seasons, rotation=45)
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.tight_layout()
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [69]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()

death_overs = df[(df['over'] >= 16) & (df['over'] <= 20) & (df['valid_ball'] == 1)]

death_runs = death_overs.groupby('batting_team')['runs_total'].sum().reset_index()
death_runs.columns = ['Team', 'Death_Over_Runs']

death_runs = death_runs.sort_values(by='Death_Over_Runs', ascending=False).reset_index(drop=True)

print(death_runs)
                           Team  Death_Over_Runs
0                Mumbai Indians             9710
1           Chennai Super Kings             9143
2   Royal Challengers Bangalore             8085
3         Kolkata Knight Riders             8033
4              Rajasthan Royals             7390
5           Sunrisers Hyderabad             6421
6               Kings XI Punjab             6029
7              Delhi Daredevils             4857
8                Delhi Capitals             3463
9                  Punjab Kings             2524
10              Deccan Chargers             2440
11               Gujarat Titans             2262
12         Lucknow Super Giants             2132
13                Pune Warriors             1303
14  Royal Challengers Bengaluru             1027
15                Gujarat Lions              881
16       Rising Pune Supergiant              533
17      Rising Pune Supergiants              427
18         Kochi Tuskers Kerala              324
In [70]:
plt.figure(figsize=(12, 6))
sns.barplot(data=death_runs, x='Death_Over_Runs', y='Team', palette='flare')
plt.title('Total Runs Scored by Teams in Death Overs (16–20)', fontsize=16)
plt.xlabel('Total Runs in Overs 16–20')
plt.ylabel('Team')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [71]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()

death_df = df[(df['over'] >= 16) & (df['over'] <= 20) & (df['valid_ball'] == 1)]
death_summary = death_df.groupby('batting_team').agg(
    death_over_runs=('runs_total', 'sum'),
    balls_faced=('valid_ball', 'count'),
    wickets_lost=('player_out', 'count')
).reset_index()

death_summary['strike_rate'] = (death_summary['death_over_runs'] / death_summary['balls_faced'] * 100).round(2)
death_summary = death_summary.sort_values(by='strike_rate', ascending=False).reset_index(drop=True)

print(death_summary)
                   batting_team  death_over_runs  balls_faced  wickets_lost  \
0   Royal Challengers Bengaluru             1027          556            61   
1                Gujarat Titans             2262         1264           115   
2       Rising Pune Supergiants              427          245            24   
3                  Punjab Kings             2524         1464           126   
4           Chennai Super Kings             9143         5323           403   
5          Lucknow Super Giants             2132         1247           104   
6   Royal Challengers Bangalore             8085         4731           418   
7                Mumbai Indians             9710         5685           535   
8         Kolkata Knight Riders             8033         4942           463   
9                Delhi Capitals             3463         2135           187   
10       Rising Pune Supergiant              533          331            32   
11             Delhi Daredevils             4857         3019           256   
12          Sunrisers Hyderabad             6421         4045           392   
13             Rajasthan Royals             7390         4682           430   
14              Kings XI Punjab             6029         3908           364   
15              Deccan Chargers             2440         1600           180   
16                Gujarat Lions              881          581            61   
17         Kochi Tuskers Kerala              324          230            29   
18                Pune Warriors             1303          972           103   

    strike_rate  
0        184.71  
1        178.96  
2        174.29  
3        172.40  
4        171.76  
5        170.97  
6        170.89  
7        170.80  
8        162.55  
9        162.20  
10       161.03  
11       160.88  
12       158.74  
13       157.84  
14       154.27  
15       152.50  
16       151.64  
17       140.87  
18       134.05  
In [72]:
plt.figure(figsize=(12, 6))
sns.barplot(data=death_summary, x='strike_rate', y='batting_team', palette='rocket')
plt.title('Death Over Strike Rate (Overs 16–20) by Team', fontsize=16)
plt.xlabel('Strike Rate')
plt.ylabel('Team')
plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [73]:
print("\033[1mPLAYERS ANALYSIS : BATSMAN\033[0m")
PLAYERS ANALYSIS : BATSMAN
In [74]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
pom_df = df.drop_duplicates(subset='match_id')[['match_id', 'player_of_match']]
pom_df = pom_df.dropna()
pom_counts = pom_df['player_of_match'].value_counts().reset_index()
pom_counts.columns = ['Player', 'POM_Awards']

top_10_pom = pom_counts.head(10)
print(top_10_pom)
           Player  POM_Awards
0  AB de Villiers          25
1        CH Gayle          22
2       RG Sharma          21
3         V Kohli          19
4        MS Dhoni          18
5       DA Warner          18
6       SP Narine          17
7      AD Russell          16
8       SR Watson          16
9       YK Pathan          16
In [75]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_10_pom, x='POM_Awards', y='Player', palette='viridis')
plt.title('Players with Most Player of the Match Awards in IPL', fontsize=16)
plt.xlabel('No. of Awards')
plt.ylabel('Player')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [76]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
top_scorers = df.groupby('batter')['runs_batter'].sum().reset_index()
top_scorers.columns = ['Player', 'Total_Runs']
top_scorers = top_scorers.sort_values(by='Total_Runs', ascending=False).head(10).reset_index(drop=True)
print(top_scorers)
           Player  Total_Runs
0         V Kohli        8671
1       RG Sharma        7048
2        S Dhawan        6769
3       DA Warner        6567
4        SK Raina        5536
5        MS Dhoni        5439
6        KL Rahul        5235
7  AB de Villiers        5181
8       AM Rahane        5032
9        CH Gayle        4997
In [77]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_scorers, x='Total_Runs', y='Player', palette='coolwarm')
plt.title('Top 10 Run Scorers in IPL History', fontsize=16)
plt.xlabel('Total Runs')
plt.ylabel('Player')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [78]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()

sixes_df = df[df['runs_batter'] == 6]
sixes_count = sixes_df.groupby('batter')['runs_batter'].count().reset_index()
sixes_count.columns = ['Player', 'Sixes']

top_six_hitters = sixes_count.sort_values(by='Sixes', ascending=False).head(10).reset_index(drop=True)
print(top_six_hitters)
           Player  Sixes
0        CH Gayle    359
1       RG Sharma    303
2         V Kohli    292
3        MS Dhoni    264
4  AB de Villiers    253
5       DA Warner    236
6      KA Pollard    224
7      AD Russell    223
8       SV Samson    219
9        KL Rahul    208
In [79]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_six_hitters, x='Sixes', y='Player', palette='magma')
plt.title('Top 10 Players with Most Sixes in IPL History', fontsize=16)
plt.xlabel('Number of Sixes')
plt.ylabel('Player')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [80]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
fours_df = df[df['runs_batter'] == 4]
fours_count = fours_df.groupby('batter')['runs_batter'].count().reset_index()
fours_count.columns = ['Player', 'Fours']
top_four_hitters = fours_count.sort_values(by='Fours', ascending=False).head(10).reset_index(drop=True)
print(top_four_hitters)
       Player  Fours
0     V Kohli    774
1    S Dhawan    768
2   DA Warner    663
3   RG Sharma    640
4   AM Rahane    515
5    SK Raina    506
6   G Gambhir    492
7  RV Uthappa    481
8  KD Karthik    466
9    SA Yadav    454
In [81]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_four_hitters, x='Fours', y='Player', palette='cubehelix')
plt.title('Top 10 Players with Most Fours in IPL History', fontsize=16)
plt.xlabel('Number of Fours')
plt.ylabel('Player')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [82]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()

threes_df = df[df['runs_batter'] == 3]
threes_count = threes_df.groupby('batter')['runs_batter'].count().reset_index()
threes_count.columns = ['Player', 'Threes']
top_three_hitters = threes_count.sort_values(by='Threes', ascending=False).head(10).reset_index(drop=True)
print(top_three_hitters)
           Player  Threes
0       DA Warner      24
1        S Dhawan      23
2         V Kohli      21
3       AM Rahane      19
4    F du Plessis      18
5         M Vijay      17
6  AB de Villiers      17
7       SV Samson      15
8      RV Uthappa      15
9        MS Dhoni      15
In [83]:
plt.figure(figsize=(10, 6))
sns.barplot(data=top_three_hitters, x='Threes', y='Player', palette='crest')
plt.title('Top 10 Players with Most Threes in IPL History', fontsize=16)
plt.xlabel('Number of Threes')
plt.ylabel('Player')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [84]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
matches_played = df.groupby('batter')['match_id'].nunique().reset_index()
matches_played.columns = ['Player', 'Matches']

total_runs = df.groupby('batter')['runs_batter'].sum().reset_index()
total_runs.columns = ['Player', 'Total_Runs']
outs = df[df['player_out'].notna()]
times_out = outs.groupby('player_out')['match_id'].count().reset_index()
times_out.columns = ['Player', 'Times_Out']

batting_stats = matches_played.merge(total_runs, on='Player', how='inner') \
                              .merge(times_out, on='Player', how='left')
batting_stats['Times_Out'] = batting_stats['Times_Out'].fillna(0)
batting_stats['Average'] = batting_stats.apply(
    lambda row: row['Total_Runs'] / row['Times_Out'] if row['Times_Out'] > 0 else None,
    axis=1
)
batting_stats_25plus = batting_stats[batting_stats['Matches'] >= 25]
batting_stats_25plus = batting_stats_25plus.sort_values(by='Average', ascending=False).reset_index(drop=True)
print(batting_stats_25plus[['Player', 'Matches', 'Total_Runs', 'Times_Out', 'Average']].head(20))
             Player  Matches  Total_Runs  Times_Out    Average
0   B Sai Sudharsan       40        1793       36.0  49.805556
1          KL Rahul      135        5235      115.0  45.521739
2         DP Conway       28        1080       25.0  43.200000
3          T Stubbs       30         711       17.0  41.823529
4           C Green       28         707       17.0  41.588235
5    Shashank Singh       33         773       19.0  40.684211
6        RD Gaikwad       70        2502       62.0  40.354839
7         DA Warner      184        6567      164.0  40.042683
8         H Klaasen       45        1480       37.0  40.000000
9       LMP Simmons       29        1079       27.0  39.962963
10   AB de Villiers      170        5181      130.0  39.853846
11        JP Duminy       75        2029       51.0  39.784314
12         CH Gayle      141        4997      126.0  39.658730
13       JC Buttler      119        4121      104.0  39.625000
14          V Kohli      259        8671      219.0  39.593607
15         SE Marsh       69        2489       63.0  39.507937
16     Shubman Gill      114        3866       98.0  39.448980
17       MEK Hussey       58        1977       51.0  38.764706
18         MS Dhoni      241        5439      142.0  38.302817
19      Tilak Varma       51        1499       40.0  37.475000
In [85]:
top_avg = batting_stats_25plus.sort_values(by='Average', ascending=False).head(20)

plt.figure(figsize=(12, 6))
sns.barplot(data=top_avg, x='Average', y='Player', palette='plasma')
plt.title('Top 20 IPL Batting Averages (Min 25 Matches)', fontsize=16)
plt.xlabel('Batting Average')
plt.ylabel('Player')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [86]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
death_overs_df = df[(df['over'] >= 16) & (df['over'] <= 20)]
death_batting = death_overs_df.groupby('batter').agg({
    'runs_batter': 'sum',
    'balls_faced': 'sum'
}).reset_index()

death_batting = death_batting[death_batting['balls_faced'] > 0]
death_batting['Strike_Rate'] = (death_batting['runs_batter'] / death_batting['balls_faced']) * 100
qualified_finishers = death_batting[death_batting['balls_faced'] >= 100]
top_finishers = qualified_finishers.sort_values(by='Strike_Rate', ascending=False).head(10).reset_index(drop=True)

print(top_finishers[['batter', 'runs_batter', 'balls_faced', 'Strike_Rate']])
           batter  runs_batter  balls_faced  Strike_Rate
0        T Stubbs          370          152   243.421053
1  AB de Villiers         1421          611   232.569558
2  LS Livingstone          241          107   225.233645
3      Naman Dhir          222          103   215.533981
4  Shashank Singh          360          172   209.302326
5       H Klaasen          434          209   207.655502
6        CH Gayle          404          196   206.122449
7      AD Russell         1134          552   205.434783
8         RR Pant          696          339   205.309735
9        TH David          578          283   204.240283
In [87]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_finishers, x='Strike_Rate', y='batter', palette='rocket')
plt.title('Top 10 Batsmen by Strike Rate in Death Overs (Min 100 Balls)', fontsize=16)
plt.xlabel('Strike Rate')
plt.ylabel('Player')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [88]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
powerplay_df = df[(df['over'] >= 1) & (df['over'] <= 6)]
pp_batting = powerplay_df.groupby('batter').agg({
    'runs_batter': 'sum',
    'balls_faced': 'sum'
}).reset_index()

pp_batting = pp_batting[pp_batting['balls_faced'] > 0]
pp_batting['Strike_Rate'] = (pp_batting['runs_batter'] / pp_batting['balls_faced']) * 100
qualified_openers = pp_batting[pp_batting['balls_faced'] >= 100]

top_pp_hitters = qualified_openers.sort_values(by='Strike_Rate', ascending=False).head(10).reset_index(drop=True)
print(top_pp_hitters[['batter', 'runs_batter', 'balls_faced', 'Strike_Rate']])
            batter  runs_batter  balls_faced  Strike_Rate
0  J Fraser-McGurk          266          108   246.296296
1    Priyansh Arya          318          164   193.902439
2          TM Head          632          330   191.515152
3          PD Salt          641          343   186.880466
4  Abhishek Sharma         1026          563   182.238011
5        SP Narine         1031          584   176.541096
6         N Pooran          263          155   169.677419
7      YBK Jaiswal         1183          738   160.298103
8     RD Rickelton          268          168   159.523810
9    A Raghuvanshi          169          108   156.481481
In [89]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_pp_hitters, x='Strike_Rate', y='batter', palette='coolwarm')
plt.title('Top 10 Batsmen by Strike Rate in Powerplay (Overs 1–6)', fontsize=16)
plt.xlabel('Strike Rate')
plt.ylabel('Player')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [90]:
centuries = df.groupby(['match_id', 'batter']).agg({'runs_batter': 'sum', 'balls_faced': 'sum'}).reset_index()
centuries = centuries[centuries['runs_batter'] >= 100]
fastest_centuries = centuries.sort_values(by='balls_faced').head(10)
fastest_centuries = fastest_centuries.rename(columns={'runs_batter': 'Runs', 'balls_faced': 'Balls'})
print(fastest_centuries)
       match_id         batter  Runs  Balls
1786     419107      YK Pathan   100     37
5840     598064      DA Miller   101     38
17231   1473484  V Suryavanshi   101     38
17524   1473505      H Klaasen   105     39
16085   1426283       WG Jacks   100     41
15856   1426268        TM Head   102     41
16864   1473459  Priyansh Arya   103     42
15310   1359543        C Green   100     47
16535   1473439   Ishan Kishan   106     47
184      335994   AC Gilchrist   109     47
In [91]:
fifties = df.groupby(['match_id', 'batter']).agg({'runs_batter': 'sum', 'balls_faced': 'sum'}).reset_index()
fifties = fifties[(fifties['runs_batter'] >= 50) & (fifties['runs_batter'] < 100)]
fastest_fifties = fifties.sort_values(by='balls_faced').head(10)
fastest_fifties = fastest_fifties.rename(columns={'runs_batter': 'Runs', 'balls_faced': 'Balls'})
print(fastest_fifties)
       match_id           batter  Runs  Balls
17310   1473489       R Shepherd    53     14
13288   1304060       PJ Cummins    56     15
9538    1136562         KL Rahul    51     16
8076     980947       KA Pollard    51     17
9297    1082636        SP Narine    54     17
15933   1426273  J Fraser-McGurk    65     18
9559    1136563        SP Narine    50     19
15782   1426263         SA Yadav    52     19
6082     729291        DA Miller    51     19
14469   1359489         N Pooran    62     19
In [92]:
high_scores = df.groupby(['match_id', 'batter']).agg({'runs_batter': 'sum'}).reset_index()
high_scores = high_scores.sort_values(by='runs_batter', ascending=False).head(10)
high_scores = high_scores.rename(columns={'runs_batter': 'Total_Runs'})
print(high_scores)
       match_id           batter  Total_Runs
5302     598027         CH Gayle         175
2        335982      BB McCullum         158
16934   1473464  Abhishek Sharma         141
14108   1304112        Q de Kock         140
7528     829795   AB de Villiers         133
11583   1216510         KL Rahul         132
15383   1370352     Shubman Gill         129
8359     980987   AB de Villiers         129
10149   1136602          RR Pant         128
4687     548372         CH Gayle         128
In [93]:
plt.figure(figsize=(10, 5))
sns.barplot(data=high_scores, x='batter', y='Total_Runs', palette='plasma')
plt.title('Top 10 Highest Individual Scores in an IPL Match', fontsize=14)
plt.xlabel('Batsman')
plt.ylabel('Total Runs')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [94]:
print("\033[1mPLAYERS ANALYSIS : BOWLERS\033[0m")
PLAYERS ANALYSIS : BOWLERS
In [95]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
valid_wickets = df[df['bowler_wicket'] == 1]
wicket_count = valid_wickets.groupby('bowler')['bowler_wicket'].sum().reset_index()
wicket_count.columns = ['Bowler', 'Wickets']
top_wicket_takers = wicket_count.sort_values(by='Wickets', ascending=False).head(10).reset_index(drop=True)
print(top_wicket_takers)
       Bowler  Wickets
0   YS Chahal      221
1     B Kumar      198
2   SP Narine      192
3   PP Chawla      192
4    R Ashwin      187
5   JJ Bumrah      186
6    DJ Bravo      183
7    A Mishra      174
8  SL Malinga      170
9   RA Jadeja      170
In [96]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12, 6))
sns.barplot(data=top_wicket_takers, x='Bowler', y='Wickets', palette='mako')
plt.title('Top 10 Wicket-Takers in IPL History', fontsize=16)
plt.xlabel('Bowler')
plt.ylabel('Total Wickets')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [97]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
valid_balls = df[df['valid_ball'] == 1]
economy_stats = valid_balls.groupby('bowler').agg({
    'runs_bowler': 'sum',
    'valid_ball': 'count'
}).reset_index()
economy_stats.columns = ['Bowler', 'Runs_Conceded', 'Balls_Bowled']
economy_stats = economy_stats[economy_stats['Balls_Bowled'] >= 250]
economy_stats['Overs'] = economy_stats['Balls_Bowled'] / 6
economy_stats['Economy'] = (economy_stats['Runs_Conceded'] / economy_stats['Overs']).round(2)
top_economical = economy_stats.sort_values(by='Economy').head(15).reset_index(drop=True)

print(top_economical[['Bowler', 'Overs', 'Runs_Conceded', 'Economy']])
              Bowler       Overs  Runs_Conceded  Economy
0           A Kumble  160.833333           1019     6.34
1     M Muralitharan  254.666667           1641     6.44
2         SM Pollock   46.000000            297     6.46
3         GD McGrath   54.000000            351     6.50
4   RE van der Merwe   73.833333            486     6.58
5           DW Steyn  363.666667           2393     6.58
6          SP Narine  725.166667           4835     6.67
7          R Rampaul   44.666667            298     6.67
8            J Yadav   65.000000            436     6.71
9         DL Vettori  129.500000            871     6.73
10        SL Malinga  471.166667           3176     6.74
11           J Botha  115.666667            781     6.75
12         DP Nannes  107.666667            732     6.80
13    AD Mascarenhas   51.333333            353     6.88
14      DE Bollinger   96.000000            661     6.89
In [98]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_economical, x='Bowler', y='Economy', palette='crest_r')
plt.title('Top 15 Most Economical Bowlers in IPL History', fontsize=16)
plt.xlabel('Bowler')
plt.ylabel('Economy Rate')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.4)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [99]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
valid_balls = df[df['valid_ball'] == 1]
expensive_stats = valid_balls.groupby('bowler').agg({
    'runs_bowler': 'sum',
    'valid_ball': 'count'
}).reset_index()
expensive_stats.columns = ['Bowler', 'Runs_Conceded', 'Balls_Bowled']
expensive_stats = expensive_stats[expensive_stats['Balls_Bowled'] >= 250]
expensive_stats['Overs'] = expensive_stats['Balls_Bowled'] / 6
expensive_stats['Economy'] = (expensive_stats['Runs_Conceded'] / expensive_stats['Overs']).round(2)
top_expensive = expensive_stats.sort_values(by='Economy', ascending=False).head(15).reset_index(drop=True)
print(top_expensive[['Bowler', 'Overs', 'Runs_Conceded', 'Economy']])
                Bowler       Overs  Runs_Conceded  Economy
0           Akash Deep   46.333333            513    11.07
1          Arshad Khan   43.333333            464    10.71
2          CJ Anderson   49.500000            497    10.04
3     Fazalhaq Farooqi   42.333333            425    10.04
4          Yash Thakur   73.833333            736     9.97
5         Mukesh Kumar  106.000000           1056     9.96
6            G Coetzee   45.500000            445     9.78
7    Vijaykumar Vyshak   55.500000            542     9.77
8         Basil Thampi   86.833333            828     9.54
9     Mukesh Choudhary   52.500000            497     9.47
10          MP Stoinis  148.000000           1397     9.44
11           SM Curran  208.833333           1969     9.43
12        Kartik Tyagi   70.333333            662     9.41
13  Azmatullah Omarzai   48.000000            451     9.40
14             R Parag   49.166667            461     9.38
In [100]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_expensive, x='Bowler', y='Economy', palette='flare')
plt.title('Top 15 Most Expensive Bowlers in IPL (by Economy)', fontsize=16)
plt.xlabel('Bowler')
plt.ylabel('Economy Rate')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.4)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [101]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
death_df = df[(df['over'] >= 16) & (df['over'] <= 20)]
death_wickets = death_df[death_df['bowler_wicket'] == 1]
death_wicket_stats = death_wickets.groupby('bowler')['bowler_wicket'].sum().reset_index()
death_wicket_stats.columns = ['Bowler', 'Wickets']
top_15_death_bowlers = death_wicket_stats.sort_values(by='Wickets', ascending=False).head(15).reset_index(drop=True)
print(top_15_death_bowlers)
            Bowler  Wickets
0         DJ Bravo      102
1          B Kumar       92
2       SL Malinga       90
3        JJ Bumrah       86
4         HV Patel       70
5        MM Sharma       61
6        SP Narine       60
7   Mohammed Shami       58
8        CH Morris       55
9         K Rabada       53
10  Sandeep Sharma       53
11        TA Boult       52
12      JD Unadkat       50
13  Arshdeep Singh       47
14      AD Russell       47
In [102]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_15_death_bowlers, x='Bowler', y='Wickets', palette='inferno')
plt.title('Top 15 Bowlers with Most Wickets in Death Overs (16–20)', fontsize=16)
plt.xlabel('Bowler')
plt.ylabel('Wickets in Death Overs')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [103]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
powerplay_df = df[(df['over'] >= 1) & (df['over'] <= 6)]
powerplay_wickets = powerplay_df[powerplay_df['bowler_wicket'] == 1]
powerplay_wicket_stats = powerplay_wickets.groupby('bowler')['bowler_wicket'].sum().reset_index()
powerplay_wicket_stats.columns = ['Bowler', 'Wickets']
top_15_powerplay_bowlers = powerplay_wicket_stats.sort_values(by='Wickets', ascending=False).head(15).reset_index(drop=True)

print(top_15_powerplay_bowlers)
             Bowler  Wickets
0          I Sharma       57
1           B Kumar       53
2         DL Chahar       53
3    Sandeep Sharma       51
4          UT Yadav       49
5          R Ashwin       45
6            Z Khan       41
7    Mohammed Shami       41
8          TA Boult       40
9       DS Kulkarni       39
10  Harbhajan Singh       38
11        JJ Bumrah       38
12        MM Sharma       36
13         RP Singh       36
14         M Morkel       36
In [104]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_15_powerplay_bowlers, x='Bowler', y='Wickets', palette='magma')
plt.title('Top 15 Bowlers with Most Wickets in Powerplay (Overs 1–6)', fontsize=16)
plt.xlabel('Bowler')
plt.ylabel('Wickets in Powerplay')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [105]:
print("\033[1mBowlers Vs Batsman\033[0m")
Bowlers Vs Batsman
In [106]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
kohli_dismissals = df[df['player_out'] == 'V Kohli']
kohli_out_by_bowler = kohli_dismissals.groupby('bowler')['player_out'].count().reset_index()
kohli_out_by_bowler.columns = ['Bowler', 'Dismissals']
top5_kohli_out = kohli_out_by_bowler.sort_values(by='Dismissals', ascending=False).head(5).reset_index(drop=True)

print(top5_kohli_out)
           Bowler  Dismissals
0  Sandeep Sharma           7
1         A Nehra           6
2  Mohammed Shami           5
3       JJ Bumrah           5
4     DS Kulkarni           4
In [107]:
plt.figure(figsize=(8, 5))
sns.barplot(data=top5_kohli_out, x='Bowler', y='Dismissals', palette='rocket')
plt.title('Top 5 Bowlers Who Dismissed Virat Kohli the Most', fontsize=14)
plt.xlabel('Bowler')
plt.ylabel('Number of Dismissals')
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [108]:
rohit_dismissals = df[df['player_out'] == 'RG Sharma']
rohit_out_by_bowler = rohit_dismissals.groupby('bowler')['player_out'].count().reset_index()
rohit_out_by_bowler.columns = ['Bowler', 'Dismissals']

top5_rohit_out = rohit_out_by_bowler.sort_values(by='Dismissals', ascending=False).head(5).reset_index(drop=True)
print("🔹 Top 5 Bowlers Who Dismissed Rohit Sharma:")
print(top5_rohit_out)
🔹 Top 5 Bowlers Who Dismissed Rohit Sharma:
          Bowler  Dismissals
0      SP Narine           8
1       A Mishra           7
2  R Vinay Kumar           6
3       DJ Bravo           5
4     SK Trivedi           5
In [109]:
plt.figure(figsize=(8, 5))
sns.barplot(data=top5_rohit_out, x='Bowler', y='Dismissals', palette='coolwarm')
plt.title('Top 5 Bowlers Who Dismissed Rohit Sharma the Most', fontsize=14)
plt.xlabel('Bowler')
plt.ylabel('Number of Dismissals')
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [110]:
dhoni_dismissals = df[df['player_out'] == 'MS Dhoni']
dhoni_out_by_bowler = dhoni_dismissals.groupby('bowler')['player_out'].count().reset_index()
dhoni_out_by_bowler.columns = ['Bowler', 'Dismissals']

top5_dhoni_out = dhoni_out_by_bowler.sort_values(by='Dismissals', ascending=False).head(5).reset_index(drop=True)
print("🔸 Top 5 Bowlers Who Dismissed MS Dhoni:")
print(top5_dhoni_out)
🔸 Top 5 Bowlers Who Dismissed MS Dhoni:
      Bowler  Dismissals
0     Z Khan           7
1    PP Ojha           7
2  JJ Bumrah           4
3   HV Patel           4
4  YS Chahal           4
In [111]:
plt.figure(figsize=(8, 5))
sns.barplot(data=top5_dhoni_out, x='Bowler', y='Dismissals', palette='plasma')
plt.title('Top 5 Bowlers Who Dismissed MS Dhoni the Most', fontsize=14)
plt.xlabel('Bowler')
plt.ylabel('Number of Dismissals')
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [112]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
bumrah_df = df[df['bowler'] == 'JJ Bumrah']
runs_vs_bumrah = bumrah_df.groupby('batter')['runs_batter'].sum().reset_index()
runs_vs_bumrah.columns = ['Batter', 'Runs_Against_Bumrah']
top10_vs_bumrah = runs_vs_bumrah.sort_values(by='Runs_Against_Bumrah', ascending=False).head(10).reset_index(drop=True)

print(top10_vs_bumrah)
           Batter  Runs_Against_Bumrah
0         V Kohli                  155
1        KL Rahul                  150
2  AB de Villiers                  131
3        S Dhawan                  105
4       MK Pandey                   81
5         SS Iyer                   74
6       JP Duminy                   70
7      JC Buttler                   69
8      KD Karthik                   68
9       SPD Smith                   68
In [113]:
plt.figure(figsize=(10, 5))
sns.barplot(data=top10_vs_bumrah, x='Batter', y='Runs_Against_Bumrah', palette='viridis')
plt.title('Top 10 Batsmen Scoring Most Runs vs Jasprit Bumrah', fontsize=14)
plt.xlabel('Batsman')
plt.ylabel('Total Runs Scored')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [114]:
shami_df = df[df['bowler'] == 'Mohammed Shami']
runs_vs_shami = shami_df.groupby('batter')['runs_batter'].sum().reset_index()
runs_vs_shami.columns = ['Batter', 'Runs_Against_Sham']
top10_vs_shami = runs_vs_shami.sort_values(by='Runs_Against_Sham', ascending=False).head(10).reset_index(drop=True)
print(top10_vs_shami)
         Batter  Runs_Against_Sham
0  F du Plessis                118
1      S Dhawan                113
2     DA Warner                109
3       V Kohli                107
4    AD Russell                 99
5     AM Rahane                 94
6    JC Buttler                 86
7     RG Sharma                 84
8     SV Samson                 82
9  Shubman Gill                 76
In [115]:
plt.figure(figsize=(10, 5))
sns.barplot(data=top10_vs_shami, x='Batter', y='Runs_Against_Sham', palette='cool')
plt.title('Top 10 Batsmen Scoring Most Runs vs Mohammed Shami', fontsize=14)
plt.xlabel('Batsman')
plt.ylabel('Total Runs Scored')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [116]:
bhuvi_df = df[df['bowler'] == 'B Kumar']
runs_vs_bhuvi = bhuvi_df.groupby('batter')['runs_batter'].sum().reset_index()
runs_vs_bhuvi.columns = ['Batter', 'Runs_Against_Bhuvi']
top10_vs_bhuvi = runs_vs_bhuvi.sort_values(by='Runs_Against_Bhuvi', ascending=False).head(10).reset_index(drop=True)
print(top10_vs_bhuvi)
         Batter  Runs_Against_Bhuvi
0      CH Gayle                 131
1       V Kohli                 129
2     SV Samson                 120
3       RR Pant                 120
4  F du Plessis                 109
5      KL Rahul                 109
6     AM Rahane                 104
7      MS Dhoni                 102
8   YBK Jaiswal                 101
9    AD Russell                  97
In [117]:
plt.figure(figsize=(10, 5))
sns.barplot(data=top10_vs_bhuvi, x='Batter', y='Runs_Against_Bhuvi', palette='crest')
plt.title('Top 10 Batsmen Scoring Most Runs vs Bhuvneshwar Kumar', fontsize=14)
plt.xlabel('Batsman')
plt.ylabel('Total Runs Scored')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [118]:
dismissal_counts = df['wicket_kind'].value_counts().reset_index()
dismissal_counts.columns = ['Dismissal_Type', 'Count']
print(dismissal_counts.head(10))
          Dismissal_Type  Count
0                 caught   8665
1                 bowled   2345
2                run out   1153
3                    lbw    853
4      caught and bowled    388
5                stumped    376
6             hit wicket     18
7           retired hurt     17
8            retired out      5
9  obstructing the field      3
In [119]:
plt.figure(figsize=(10, 6))
sns.barplot(data=dismissal_counts.head(10), x='Count', y='Dismissal_Type', palette='rocket')
plt.title('Most Common Dismissal Types in IPL', fontsize=14)
plt.xlabel('Count')
plt.ylabel('Dismissal Type')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [120]:
catch_df = df[df['wicket_kind'] == 'caught']
catch_df = catch_df[catch_df['fielders'].notna()]
catch_counts = catch_df['fielders'].value_counts().reset_index()
catch_counts.columns = ['Fielder', 'Catches']
top_catch_fielders = catch_counts.head(10)
print(top_catch_fielders)
          Fielder  Catches
0        MS Dhoni      158
1      KD Karthik      145
2  AB de Villiers      120
3         V Kohli      117
4        SK Raina      106
5       RA Jadeja      103
6       RG Sharma      101
7        S Dhawan      100
8      KA Pollard       97
9         WP Saha       93
In [121]:
plt.figure(figsize=(10, 5))
sns.barplot(data=top_catch_fielders, x='Catches', y='Fielder', palette='viridis')
plt.title('Top 10 Fielders with Most Catches in IPL', fontsize=14)
plt.xlabel('Total Catches')
plt.ylabel('Fielder')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [122]:
all_fielding = df[df['fielders'].notna()]
fielder_total_dismissals = all_fielding['fielders'].value_counts().reset_index()
fielder_total_dismissals.columns = ['Fielder', 'Total_Fielding_Dismissals']

top_fielders = fielder_total_dismissals.head(10)
print(top_fielders)
          Fielder  Total_Fielding_Dismissals
0        MS Dhoni                        221
1      KD Karthik                        191
2  AB de Villiers                        134
3      RV Uthappa                        128
4         V Kohli                        126
5         WP Saha                        121
6       SV Samson                        113
7        SK Raina                        112
8       RA Jadeja                        109
9         RR Pant                        106
In [123]:
plt.figure(figsize=(10, 5))
sns.barplot(data=top_fielders, x='Total_Fielding_Dismissals', y='Fielder', palette='magma')
plt.title('Best Fielders in IPL (Total Dismissals)', fontsize=14)
plt.xlabel('Total Fielding Contributions')
plt.ylabel('Fielder')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]: